####
#Replication file for Holland and Rios; Violence and Business Interest in Social Welfare: Evidence from Mexico
####
rm(list = ls())
#setwd("~/Dropbox/Drafts")

library(stm)
library(readstata13)
library(dplyr)
library(grid)
library(ggplot2)
library(gridExtra)
library(SuperLearner)
set.seed(150)

#Upload data
data <- read.dta13("Mex_FinalData.dta")

#Change to have the same direcion than "positive" questions
data$op_isr <- recode(data$op_isr, `1` = "7.Strongly agree", `2` = "6.Agree", `3` = "5.Sometimes agree", `4` = "4.Neutral", `5` = "3.Somewhat disagree", 
                      `6` = "2.Disagree", `7` = "1.Strongly disagree")
data$op_imss <- recode(data$op_imss, `1` = "7.Strongly agree", `2` = "6.Agree", `3` = "5.Sometimes agree", `4` = "4.Neutral", `5` = "3.Somewhat disagree", 
                       `6` = "2.Disagree", `7` = "1.Strongly disagree")

########################################
#Table 1: Comparing Sample to National Business Census
########################################

  #By economic sector
  sector <- recode(data$code_type, `1` = "agriculture", `2` = "manufacture", `3` = "commerce", `4` = "services")
  table(sector)/length(sector)
  #By size 
  firmsize <- recode(data$code_employ, `1` = "<11", `2` = "11-50", `3` = "11-50", `4` = "51-250", `5` = "51-250", `6` = ">250")
  table(firmsize)/length(firmsize)
  #Census data extracted from www.inegi.gob.mx
  
#Figure 1: Distribution of responses
  
  #Plot
  plot1 <- ggplot(data, aes(x=op_poverty)) + geom_bar(aes(y = (..count..)/sum(..count..))) + coord_flip() + 
    labs(title="More public resources should be allocated to increasing social programs intended to reduce poverty", x = "", y="") + 
    theme(plot.title = element_text(size = 8))
  plot2 <- ggplot(data, aes(x=op_education)) + geom_bar(aes(y = (..count..)/sum(..count..)))  + coord_flip() + 
    labs(title="More public resources should be allocated to improving the quality of public education", x = "", y="")+ 
            theme(plot.title = element_text(size = 8))
  plot3 <- ggplot(data, aes(x=op_imss)) + geom_bar(aes(y = (..count..)/sum(..count..))) + coord_flip() + 
    labs(title="Social security is a waste of money. I prefer to hire contracted laborers", x = "", y="")+ 
    theme(plot.title = element_text(size = 8))
  plot4 <- ggplot(data, aes(x=op_isr)) + geom_bar(aes(y = (..count..)/sum(..count..))) + coord_flip() + 
    labs(title="Income taxes should decrease even if it reduces public spending on social programs", x = "", y="")+ 
    theme(plot.title = element_text(size = 8))
  
  #Grid
  grid.arrange(plot1, plot2, plot3, plot4, ncol=2, nrow=2)
  
########################################
#Figure 3: Marginal Conditional Average Treatment Effects
######################################
  
  #Load dataset with cathegorical variables
  #This dataset changed op_isr to have the same direcion than "positive" questions; it also works with cathegorical variables
  data <- read.csv("HR_DataSuperlearner.csv")
  

  #Only work with the experimental data of violence treatment
  data<-data[data$treat_corr==0,]
  
  #Interactions with treatment of violence
  VioExp<-data$treat_vio*data$export
  VioEduc<-data$treat_vio*data$EducCat
  VioAct<-data$treat_vio*data$ActCat
  VioHom17<-data$treat_vio*data$Hom17Cat
  VioEmloy<-data$treat_vio*data$employCat
  data<-cbind(data,VioExp, VioEduc,VioAct,VioHom17,VioEmloy)
  
  #Divide dataset in train and test
  x_train<-sample(nrow(data),500)
  train<-data[x_train,]
  test<-data[-x_train,]
  
  # Create validation and training set
  #This changes when measuring a different dependent variable (poverty is variable 3, ISR is 4).
  #Poverty
  #y <-as.numeric(train[,3])
  #ISR corporate tax
  y<-as.numeric(train[,4])

  #Take out corruption treatment and y's
  x<-data.frame(train[,-c(2:4)])
  colnames(x)<-c('Treat', 'Export', 'Educ', 'Act', 'Hom17Cat', 'Employ', 'VioExp', 'VioEduc', 'VioAct', 'VioHom17', 'VioEmploy')
  xtest<-data.frame(test[,-c(2:4)])
  colnames(xtest)<-c('Treat', 'Export', 'Educ', 'Act', 'Hom17Cat', 'Employ', 'VioExp', 'VioEduc', 'VioAct', 'VioHom17', 'VioEmploy')

  # Create the tuned model
  model.L <- SuperLearner(y,
                          x,
                          newX=xtest,
                          SL.library=list("SL.bartMachine", "SL.glmnet", "SL.ranger", "SL.ksvm","SL.ipredbagg","SL.bayesglm"))
  #model.L$coef
  #model.L$cvRisk
  #Model.L changes according to train sample. The values used to get the results reported are reported at SuperlearnerParameters.xlsx
  
  #Heterogeneous
  
  ##Export
  #treat export
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-1
  newObs$Export<-1
  newObs$VioExp<-1
  newObs$VioEduc<-1*newObs$Educ
  newObs$VioAct<-1*newObs$Act
  newObs$VioHom17<-1*newObs$Hom17Cat
  newObs$VioEmploy<-1*newObs$Employ
  
  predictions.VioExp<- predict.SuperLearner(model.L, newdata=newObs)
  predVioExp<-predictions.VioExp$pred
  
  #treat no export
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-1
  newObs$Export<-0
  newObs$VioExp<-0
  newObs$VioEduc<-1*newObs$Educ
  newObs$VioAct<-1*newObs$Act
  newObs$VioHom17<-1*newObs$Hom17Cat
  newObs$VioEmploy<-1*newObs$Employ
  
  predictions.VioNoExp<- predict.SuperLearner(model.L, newdata=newObs)
  predVioNoExp<-predictions.VioNoExp$pred
  
  
  # No treat export
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-0
  newObs$Export<-1
  newObs$VioExp<-0
  newObs$VioEduc<-0
  newObs$VioAct<-0
  newObs$VioHom17<-0
  newObs$VioEmploy<-0
  
  predictions.NoVioExp<- predict.SuperLearner(model.L, newdata=newObs)
  predNoVioExp<-predictions.NoVioExp$pred
  
  #no treat no export
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-0
  newObs$Export<-0
  newObs$VioExp<-0
  newObs$VioEduc<-0
  newObs$VioAct<-0
  newObs$VioHom17<-0
  newObs$VioEmploy<-0
  
  predictions.NoVioNoExp<- predict.SuperLearner(model.L, newdata=newObs)
  predNoVioNoExp<-predictions.NoVioNoExp$pred
  
  ## Educ
  
  #treat educ
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-1
  newObs$Educ<-1
  newObs$VioEduc<-1
  newObs$VioExp<-1*newObs$Export
  newObs$VioAct<-1*newObs$Act
  newObs$VioHom17<-1*newObs$Hom17Cat
  newObs$VioEmploy<-1*newObs$Employ
  
  predictions.VioEduc<- predict.SuperLearner(model.L, newdata=newObs)
  predVioEduc<-predictions.VioEduc$pred
  
  #treat no educ
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-1
  newObs$Educ<-0
  newObs$VioEduc<-0
  newObs$VioExp<-1*newObs$Export
  newObs$VioAct<-1*newObs$Act
  newObs$VioHom17<-1*newObs$Hom17Cat
  newObs$VioEmploy<-1*newObs$Employ
  
  predictions.VioNoEduc<- predict.SuperLearner(model.L, newdata=newObs)
  predVioNoEduc<-predictions.VioNoEduc$pred
  
  
  # No treat educ
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-0
  newObs$Educ<-1
  newObs$VioExp<-0
  newObs$VioEduc<-0
  newObs$VioAct<-0
  newObs$VioHom17<-0
  newObs$VioEmploy<-0
  
  predictions.NoVioEduc<- predict.SuperLearner(model.L, newdata=newObs)
  predNoVioEduc<-predictions.NoVioEduc$pred
  
  #no treat no export
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-0
  newObs$Educ<-0
  newObs$VioExp<-0
  newObs$VioEduc<-0
  newObs$VioAct<-0
  newObs$VioHom17<-0
  newObs$VioEmploy<-0
  
  predictions.NoVioNoEduc<- predict.SuperLearner(model.L, newdata=newObs)
  predNoVioNoEduc<-predictions.NoVioNoEduc$pred
  
  ## Act
  #treat act
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-1
  newObs$Act<-1
  newObs$VioAct<-1
  newObs$VioExp<-1*newObs$Export
  newObs$VioEduc<-1*newObs$Educ
  newObs$VioHom17<-1*newObs$Hom17Cat
  newObs$VioEmploy<-1*newObs$Employ
  
  predictions.VioAct<- predict.SuperLearner(model.L, newdata=newObs)
  predVioAct<-predictions.VioAct$pred
  
  #treat no act
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-1
  newObs$Act<-0
  newObs$VioAct<-0
  newObs$VioExp<-1*newObs$Export
  newObs$VioEduc<-1*newObs$Educ
  newObs$VioHom17<-1*newObs$Hom17Cat
  newObs$VioEmploy<-1*newObs$Employ
  
  predictions.VioNoAct<- predict.SuperLearner(model.L, newdata=newObs)
  predVioNoAct<-predictions.VioNoAct$pred
  
  
  # No treat act
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-0
  newObs$Act<-1
  newObs$VioExp<-0
  newObs$VioEduc<-0
  newObs$VioAct<-0
  newObs$VioHom17<-0
  newObs$VioEmploy<-0
  
  predictions.NoVioAct<- predict.SuperLearner(model.L, newdata=newObs)
  predNoVioAct<-predictions.NoVioAct$pred
  
  #no treat no act
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-0
  newObs$Act<-0
  newObs$VioExp<-0
  newObs$VioEduc<-0
  newObs$VioAct<-0
  newObs$VioHom17<-0
  newObs$VioEmploy<-0
  
  predictions.NoVioNoAct<- predict.SuperLearner(model.L, newdata=newObs)
  predNoVioNoAct<-predictions.NoVioNoAct$pred
  
  ## Hom17
  #treat hom17
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-1
  newObs$Hom17Cat<-1
  newObs$VioHom17<-1
  newObs$VioExp<-1*newObs$Export
  newObs$VioEduc<-1*newObs$Educ
  newObs$VioAct<-1*newObs$Act
  newObs$VioEmploy<-1*newObs$Employ
  
  predictions.VioHom17<- predict.SuperLearner(model.L, newdata=newObs)
  predVioHom17<-predictions.VioHom17$pred
  
  #treat no hom17
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-1
  newObs$Hom17Cat<-0
  newObs$VioHom17<-0
  newObs$VioExp<-1*newObs$Export
  newObs$VioEduc<-1*newObs$Educ
  newObs$VioAct<-1*newObs$Act
  newObs$VioEmploy<-1*newObs$Employ
  
  newObs$VioExp<-1*newObs$Export
  newObs$VioEduc<-1*newObs$Educ
  newObs$VioAct<-1*newObs$Act
  newObs$VioHom17<-1*newObs$Hom17Cat
 
   predictions.VioNoHom17<- predict.SuperLearner(model.L, newdata=newObs)
  predVioNoHom17<-predictions.VioNoHom17$pred
  
  
  # No treat hom17
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-0
  newObs$Hom17Cat<-1
  newObs$VioExp<-0
  newObs$VioEduc<-0
  newObs$VioAct<-0
  newObs$VioHom17<-0
  newObs$VioEmploy<-0
  
  predictions.NoVioHom17<- predict.SuperLearner(model.L, newdata=newObs)
  predNoVioHom17<-predictions.NoVioHom17$pred
  
  #no treat no hom17
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-0
  newObs$Hom17Cat<-0
  newObs$VioExp<-0
  newObs$VioEduc<-0
  newObs$VioAct<-0
  newObs$VioHom17<-0
  newObs$VioEmploy<-0
  
  predictions.NoVioNoHom17<- predict.SuperLearner(model.L, newdata=newObs)
  predNoVioNoHom17<-predictions.NoVioNoHom17$pred
  
  
  ## Employ
  #treat employ
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-1
  newObs$Employ<-1
  newObs$VioEmploy<-1
  
  predictions.VioEmploy<- predict.SuperLearner(model.L, newdata=newObs)
  predVioEmploy<-predictions.VioEmploy$pred
  
  #treat no employ
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-1
  newObs$Employ<-0
  newObs$VioEmploy<-0
  newObs$VioExp<-1*newObs$Export
  newObs$VioEduc<-1*newObs$Educ
  newObs$VioAct<-1*newObs$Act
  newObs$VioHom17<-1*newObs$Hom17Cat
  
  predictions.VioNoEmploy<- predict.SuperLearner(model.L, newdata=newObs)
  predVioNoEmploy<-predictions.VioNoEmploy$pred
  
  
  # No treat employ
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-0
  newObs$Employ<-1
  newObs$VioExp<-0
  newObs$VioEduc<-0
  newObs$VioAct<-0
  newObs$VioHom17<-0
  newObs$VioEmploy<-0
  
  predictions.NoVioEmploy<- predict.SuperLearner(model.L, newdata=newObs)
  predNoVioEmploy<-predictions.NoVioEmploy$pred
  
  #no treat no EMPLOY
  newObs <- data.frame(t(colMeans(xtest)))
  newObs$Treat<-0
  newObs$Employ<-0
  newObs$VioExp<-0
  newObs$VioEduc<-0
  newObs$VioAct<-0
  newObs$VioHom17<-0
  newObs$VioEmploy<-0
  
  predictions.NoVioNoEmploy<- predict.SuperLearner(model.L, newdata=newObs)
  predNoVioNoEmploy<-predictions.NoVioNoEmploy$pred
  
  pred_vioisr<-rbind(predVioExp,predVioNoExp,predVioEduc,predVioNoEduc,predVioAct,predVioNoAct,
                     predVioHom17,predVioNoHom17,predVioEmploy,predVioNoEmploy)
  pred_Novioisr<-rbind(predNoVioExp,predNoVioNoExp,predNoVioEduc,predNoVioNoEduc,predNoVioAct,predNoVioNoAct,
                     predNoVioHom17,predNoVioNoHom17,predNoVioEmploy,predNoVioNoEmploy)
  predisr <- pred_vioisr - pred_Novioisr
  write.csv(predisr, file = "Figure3.csv")
  #The code of Figure 3 needs to be run twice, once for SR and once for poverty. 

#Plot Figure 3    
data <- read.csv("Figure3.csv")

data$Category<-as.character(data$Category)
data$Category<-factor(data$Category, levels=c("No export","Export", "Low edu", "High edu", "Other act", "Primaries", "Low homic", "High homic", "Large firm", "Small firm"))

poverty<-ggplot()+geom_point(aes(x=op_poverty, y=Category, size=size), data)+ 
  scale_x_continuous(breaks=c(-.05,0,.3))+scale_size_continuous(range = c(1, 5))+ geom_line(aes(x=op_poverty, y=Category, group=Group), data)+ 
  geom_vline(xintercept = 0)+facet_wrap(Group ~ ., ncol = 1, scales="free_y") + 
  xlab("Poverty")+theme(strip.background = element_blank(), strip.text.x = element_blank(), legend.position="none", 
                        axis.title.y  = element_blank(), plot.margin=unit(c(1,0.3,1,0), "cm"))

isr<-ggplot()+geom_point(aes(x=op_isr, y=Category, size=size), data)+ 
  scale_x_continuous(breaks=c(-.2,0,.8))+scale_size_continuous(range = c(1, 5))+ 
  geom_line(aes(x=op_isr, y=Category, group=Group), data)+ geom_vline(xintercept = 0)+facet_wrap(Group ~ ., ncol = 1, scales="free_y") + 
  xlab("ISR")+theme(strip.background = element_blank(), strip.text.x = element_blank(), legend.position="none", 
                    axis.title.y  = element_blank(), plot.margin=unit(c(1,0.3,1,0), "cm"))

grid.arrange(poverty,isr, nrow=1, ncol=2)

